Get link for data:
from urllib.request import urlopen
from bs4 import BeautifulSoup
# Get data into pandas df
URL = "https://www.michigan.gov/coronavirus/0,9753,7-406-98163_98173---,00.html"
HTML = urlopen(URL).read().decode("utf-8")
start_index = HTML.find("shortdesc")
end_index = HTML.find("footerArea")
data = HTML[start_index:end_index]
soup = BeautifulSoup(data, features="html.parser")
links = [link.get('href') for link in soup.find_all('a')]
finallink = "https://michigan.gov" + \
[i for i in links if "by_Date" in i][0]
Download data:
download.file(py$finallink, destfile = "data/covid.xlsx")
mi_data = readxl::read_excel("data/covid.xlsx")
Clean data:
mi_data = r.mi_data
max_date = max(mi_data["Updated"])
agg_data = mi_data.groupby(["Date"], as_index=False).sum()
date_update = format(py$max_date, '%d %b %Y')
mi_cases_by_day = py$agg_data
head(mi_cases_by_day)
## Date Cases Deaths Cases.Cumulative Deaths.Cumulative
## 1 2020-02-29 19:00:00 14 0 14 0
## 2 2020-03-01 19:00:00 13 1 27 1
## 3 2020-03-02 19:00:00 22 0 49 1
## 4 2020-03-03 19:00:00 24 0 73 1
## 5 2020-03-04 19:00:00 26 0 99 1
## 6 2020-03-05 19:00:00 42 0 141 1
Initial Data Visualization:
plot_ly(
mi_cases_by_day,
x = ~Date,
y = ~Cases
)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
With 7 day moving average:
mi_cases_by_day <- mi_cases_by_day %>%
mutate(
cases_ma = rollapply(Cases, 7, mean, align = "center", fill = 0),
deaths_ma = rollapply(Deaths, 7, mean, align = "center", fill = 0)
)
plot_ly(
mi_cases_by_day,
x = ~Date
) %>% add_trace(
y = ~Cases
) %>%
add_lines(
y = ~cases_ma
)